library(cytomapper)
library(dplyr)
library(ggplot2)
library(simpleSeg)
library(FuseSOM)
library(ggpubr)
library(scater)
library(spicyR)
#library(ClassifyR)
library(ClassifyR, lib.loc = "~/localPackages/")
library(scFeatures)
library(lisaClust)
#withr::with_libpaths(new = "~/localPackages/", devtools::install_github("SydneyBioX/ClassifyR"))
nCores <- 20
BPPARAM <- simpleSeg:::generateBPParam(nCores)
Our images are stored in the images folder within the
Data folder. Here we use readImages() from the
EBImage package to read these into R. If memory is a
restricting factor, and the files are in a slightly different format,
you could use loadImages() from the cytomapper
package to load all of the tiff images into a CytoImageList
object, which can store the images as h5 on-disk.
pathToImages <- "Data/images"
# Get directories of images
imageDirs <- dir(pathToImages, full.names = TRUE)
names(imageDirs) <- dir(pathToImages, full.names = FALSE)
# Get files in each directory
files <- sapply(imageDirs, list.files, pattern = "tif", full.names = TRUE, simplify = FALSE)
# Read files with readImage from EBImage
images <- lapply(files, EBImage::readImage, as.is = TRUE)
# We've found it useful to sqrt transform our images
images <- lapply(images, sqrt)
We will make use of the on_disk option to convert our
images to a CytoImageList with the images not held in
memory.
# Store images in a CytoImageList with images on_disk as h5 files to save memory.
dir.create("Data/h5Files")
images <- CytoImageList(images, on_disk = TRUE, h5FilesPath = "Data/h5Files", BPPARAM = BPPARAM)
mcols(images) <- S4Vectors::DataFrame(imageID = names(images))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 15461352 825.8 27433958 1465.2 20973335 1120.1
## Vcells 25292943 193.0 6813011224 51979.2 8516264030 64974.0
# images <- cytomapper::loadImages("Data/h5Files/", pattern = "h5", h5FilesPath = "Data/h5Files/",on_disk = TRUE)
# mcols(images) <- S4Vectors::DataFrame(imageID = names(images))
masks <- simpleSeg(images,
nucleus = c("PCA", "HH3"),
cellBody = "dilate",
sizeSelection = 40,
discSize = 5,
cores = nCores)
display(colorLabels(masks[[1]]))
plotPixels(image = images[1:3],
mask = masks[1:3],
img_id = "imageID",
colour_by = c("PanKRT", "GLUT1", "HH3", "CD3", "CD20"),
display = "single",
colour = list(HH3 = c("black","blue"),
CD3 = c("black","purple"),
CD20 = c("black","green"),
GLUT1 = c("black", "red"),
PanKRT = c("black", "yellow")),
legend = NULL)
cells <- cytomapper::measureObjects(masks,
images,
img_id = "imageID",
BPPARAM = BPPARAM)
clinical <- read.csv("Data/1-s2.0-S0092867421014860-mmc1.csv")
clinical <- clinical |>
mutate(imageID = paste0("Point", PointNumber, "_pt", Patient_ID, "_", TMAD_Patient))
clinical$imageID[grep("normal", clinical$Tissue_Type)] <- paste0(clinical$imageID[grep("normal", clinical$Tissue_Type)], "_Normal")
clinicalVariables <- c("imageID", "Patient_ID","Status", "Age", "SUBTYPE", "PAM50", "Treatment", "DCIS_grade", "Necrosis")
rownames(clinical) <- clinical$imageID
clinical <- clinical[names(images), ]
colData(cells) <- cbind(colData(cells), clinical[cells$imageID, clinicalVariables])
df <- as.data.frame(cbind(colData(cells), t(assay(cells, "counts"))))
ggplot(df, aes(x = (PanKRT), colour = imageID)) + geom_density() + theme(legend.position = "none")
#cells <- normalizeCells(cells, transformation = "sqrt", assayIn = "counts")
#source("scMerge2.R")
cells <- normalizeCells(cells, transformation = c("sqrt"), method = c("quant99", "minMax"), assayIn = "counts", cores = nCores)
df <- as.data.frame(cbind(colData(cells), t(assay(cells, "norm"))))
ggplot(df, aes(x = (PanKRT), colour = imageID)) + geom_density() + theme(legend.position = "none")
cells@metadata <- list()
#The markers used in the original publication to gate cell types
useMarkers <- c("PanKRT", "ECAD", "CD45", "CK7", "VIM", "FAP", "CK5", "CD36",
"CD31", "CD20", "CD4", "CD3", "CD8", "CD14", "CD11c", "CD68",
"HLADRDPDQ", "SMA")
set.seed(51773)
cells <- runFuseSOM(cells, markers = useMarkers, assay = 'norm', numClusters = 25)
# As I've already run runFuseSOM I don't need to run generateSOM()
cells <- estimateNumCluster(cells, kseq = 2:30)
##
========
================
========================
================================
========================================
================================================
========================================================
================================================================
========================================================================
================================================================================
optiPlot(cells, method = "gap")
plotGroupedHeatmap(cells,
features = useMarkers,
group = "clusters",
exprs_values = "norm",
center = TRUE,
scale = TRUE,
zlim = c(-2,2))
sort(table(colData(cells)$clusters))
##
## cluster_5 cluster_23 cluster_14 cluster_7 cluster_8 cluster_12 cluster_3
## 129 338 358 364 377 439 490
## cluster_4 cluster_21 cluster_9 cluster_25 cluster_10 cluster_2 cluster_11
## 539 553 571 787 967 1261 1398
## cluster_19 cluster_16 cluster_18 cluster_17 cluster_6 cluster_15 cluster_24
## 1417 1574 1653 1859 2051 2277 2559
## cluster_22 cluster_20 cluster_13 cluster_1
## 3200 4027 4991 28351
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
cellsToUse <- cells$Status%in%c("nonprogressor", "progressor")
testProp <- colTest(cells[, cellsToUse],
condition = "Status",
feature = "clusters",
type = "ttest")
testProp
## mean in group nonprogressor mean in group progressor t pval
## cluster_22 0.0650 0.03700 2.800 0.0074
## cluster_17 0.0380 0.02600 2.100 0.0450
## cluster_19 0.0270 0.01400 2.000 0.0460
## cluster_23 0.0033 0.01100 -1.500 0.1600
## cluster_24 0.0510 0.03400 1.300 0.2200
## cluster_8 0.0031 0.00820 -1.200 0.2400
## cluster_1 0.4500 0.49000 -1.100 0.2700
## cluster_21 0.0072 0.00420 1.000 0.3100
## cluster_5 0.0021 0.00087 1.000 0.3100
## cluster_18 0.0290 0.02300 0.990 0.3300
## cluster_11 0.0240 0.02000 0.900 0.3800
## cluster_9 0.0096 0.01300 -0.880 0.3900
## cluster_16 0.0200 0.02600 -0.660 0.5200
## cluster_14 0.0055 0.00730 -0.590 0.5600
## cluster_15 0.0340 0.03000 0.530 0.6000
## cluster_20 0.0580 0.07200 -0.520 0.6100
## cluster_3 0.0037 0.00460 -0.460 0.6500
## cluster_25 0.0110 0.01300 -0.410 0.6800
## cluster_6 0.0390 0.03400 0.420 0.6800
## cluster_7 0.0082 0.00610 0.410 0.6800
## cluster_12 0.0072 0.01100 -0.410 0.6900
## cluster_2 0.0160 0.01800 -0.330 0.7500
## cluster_10 0.0120 0.01300 -0.310 0.7600
## cluster_13 0.0710 0.07200 -0.086 0.9300
## cluster_4 0.0084 0.00860 -0.087 0.9300
## adjPval cluster
## cluster_22 0.18 cluster_22
## cluster_17 0.38 cluster_17
## cluster_19 0.38 cluster_19
## cluster_23 0.81 cluster_23
## cluster_24 0.81 cluster_24
## cluster_8 0.81 cluster_8
## cluster_1 0.81 cluster_1
## cluster_21 0.81 cluster_21
## cluster_5 0.81 cluster_5
## cluster_18 0.81 cluster_18
## cluster_11 0.81 cluster_11
## cluster_9 0.81 cluster_9
## cluster_16 0.82 cluster_16
## cluster_14 0.82 cluster_14
## cluster_15 0.82 cluster_15
## cluster_20 0.82 cluster_20
## cluster_3 0.82 cluster_3
## cluster_25 0.82 cluster_25
## cluster_6 0.82 cluster_6
## cluster_7 0.82 cluster_7
## cluster_12 0.82 cluster_12
## cluster_2 0.83 cluster_2
## cluster_10 0.83 cluster_10
## cluster_13 0.93 cluster_13
## cluster_4 0.93 cluster_4
prop <- getProp(cells, feature = "clusters")
clusterToUse <- rownames(testProp)[1]
boxplot( prop[imagesToUse, clusterToUse] ~ clinical[imagesToUse, "Status"] )
set.seed(51773)
cells <- runUMAP(cells, subset_row = useMarkers, exprs_values = "norm")
plotReducedDim(cells[,colData(cells)$imageID %in% unique(colData(cells)$imageID)[c(c(1,20,40,50,60),c(1,20,40,50,60)+2)]], dimred="UMAP", colour_by="imageID")
plotReducedDim(cells[,colData(cells)$imageID %in% unique(colData(cells)$imageID)[c(c(1,20,40,50,60),c(1,20,40,50,60)+2)]], dimred="UMAP", colour_by="clusters")
spicyTest <- spicy(cells[, cellsToUse],
condition = "Status",
cellType = "clusters",
imageID = "imageID",
spatialCoords = c("m.cx", "m.cy"),
Rs = c(20, 50, 100),
sigma = 50,
BPPARAM = BPPARAM)
topPairs(spicyTest, n = 10)
## intercept coefficient p.value adj.pvalue from
## cluster_25__cluster_1 -116.2825 63.12200 0.0007060349 0.1542898 cluster_25
## cluster_1__cluster_25 -113.7198 58.28772 0.0008580480 0.1542898 cluster_1
## cluster_25__cluster_13 -107.1412 86.90733 0.0010431800 0.1542898 cluster_25
## cluster_24__cluster_7 -160.8764 69.21451 0.0012028068 0.1542898 cluster_24
## cluster_13__cluster_25 -103.9409 84.83352 0.0012382808 0.1542898 cluster_13
## cluster_3__cluster_16 -161.8938 70.53442 0.0018794192 0.1951464 cluster_3
## cluster_16__cluster_3 -162.0196 61.66160 0.0030316699 0.2596910 cluster_16
## cluster_7__cluster_24 -159.4935 84.31748 0.0033347161 0.2596910 cluster_7
## cluster_5__cluster_14 -125.0648 503.79472 0.0043583270 0.3016931 cluster_5
## cluster_4__cluster_14 -127.3751 105.47747 0.0085322807 0.5315611 cluster_4
## to
## cluster_25__cluster_1 cluster_1
## cluster_1__cluster_25 cluster_25
## cluster_25__cluster_13 cluster_13
## cluster_24__cluster_7 cluster_7
## cluster_13__cluster_25 cluster_25
## cluster_3__cluster_16 cluster_16
## cluster_16__cluster_3 cluster_3
## cluster_7__cluster_24 cluster_24
## cluster_5__cluster_14 cluster_14
## cluster_4__cluster_14 cluster_14
signifPlot(spicyTest)
cells <- lisaClust(cells, k = 10, Rs = c(20,50,100), sigma = 50,
spatialCoords = c("m.cx", "m.cy"), cellType = "clusters", BPPARAM = BPPARAM)
hatchingPlot(cells,
useImages = "Point2206_pt1116_31620",
cellType = "clusters",
spatialCoords = c("m.cx", "m.cy"),
window = "square",
nbp = 200
)
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
prop <- getProp(cells, feature = "region")
testRegion <- colTest(prop[imagesToUse,], condition = clinical[imagesToUse, "Status"])
testRegion
## W pval adjPval cluster
## region_6 150 0.0024 0.024 region_6
## region_4 450 0.0085 0.042 region_4
## region_7 240 0.2300 0.640 region_7
## region_9 370 0.2900 0.640 region_9
## region_10 250 0.3200 0.640 region_10
## region_8 340 0.5600 0.870 region_8
## region_2 280 0.6400 0.870 region_2
## region_1 320 0.7800 0.870 region_1
## region_5 290 0.7800 0.870 region_5
## region_3 300 0.8900 0.890 region_3
boxplot(prop[imagesToUse,rownames(testRegion)[1]]~clinical[imagesToUse, "Status"])
regionMap(cells, cellType = "clusters", limit = c(0.2,5))
data <- scFeatures(cells,
feature_types = c("proportion_raw", "gene_mean_celltype"),
sample = "imageID",
celltype = "clusters",
assay = "norm",
ncores = nCores )
## [1] "generating proportion raw features"
## [1] "generating gene mean celltype features"
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
test <- colTest(data$feature_gene_mean_celltype[imagesToUse,],
condition = clinical[imagesToUse, "Status"])
test |> head()
## W pval adjPval cluster
## cluster_17--CD8 180 0.00020 0.20 cluster_17--CD8
## cluster_17--PDL1 170 0.00057 0.29 cluster_17--PDL1
## cluster_23--GLUT1 140 0.00110 0.37 cluster_23--GLUT1
## cluster_16--CD11c 240 0.00190 0.48 cluster_16--CD11c
## cluster_25--HER2 210 0.00240 0.48 cluster_25--HER2
## cluster_23--pS6 160 0.00390 0.49 cluster_23--pS6
imagesToUse <- rownames(clinical)[clinical[, "Status"]%in%c("nonprogressor", "progressor")]
data[["regions"]] <- getProp(cells, "region")
data <- lapply(data, as.data.frame)
#Subset data
measurements <- lapply(data, function(x)x[imagesToUse, ])
names(measurements) <- c("prop", "mean", "region")
set.seed(51773)
cv <- crossValidate(measurements = measurements,
outcome = clinical[imagesToUse, "Status"],
nCores = nCores,
nFeatures = 20,
classifier = "randomForest")
ROCplot(cv, comparison = "Assay Name")